Correlation and Correlation Matrices


In [2]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Read file into a Pandas dataframe
from pandas import DataFrame, read_csv
f = 'https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'
df = read_csv(f)
df=df[0:10]
df


Out[2]:
M 0.455 0.365 0.095 0.514 0.2245 0.101 0.15 15
0 M 0.350 0.265 0.090 0.2255 0.0995 0.0485 0.070 7
1 F 0.530 0.420 0.135 0.6770 0.2565 0.1415 0.210 9
2 M 0.440 0.365 0.125 0.5160 0.2155 0.1140 0.155 10
3 I 0.330 0.255 0.080 0.2050 0.0895 0.0395 0.055 7
4 I 0.425 0.300 0.095 0.3515 0.1410 0.0775 0.120 8
5 F 0.530 0.415 0.150 0.7775 0.2370 0.1415 0.330 20
6 F 0.545 0.425 0.125 0.7680 0.2940 0.1495 0.260 16
7 M 0.475 0.370 0.125 0.5095 0.2165 0.1125 0.165 9
8 F 0.550 0.440 0.150 0.8945 0.3145 0.1510 0.320 19
9 F 0.525 0.380 0.140 0.6065 0.1940 0.1475 0.210 14

In [ ]:
Correlation matrix plotting function:

In [9]:
# Correlation matric plotting function
def correlation_matrix(df):
    from matplotlib import pyplot as plt
    from matplotlib import cm as cm

    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    cmap = cm.get_cmap('jet', 30)
    cax = ax1.imshow(df.corr(), interpolation="nearest", cmap=cmap)
    ax1.grid(True)
    plt.title('Abalone Feature Correlation')
    labels=['Sex','Length','Diam','Height','Whole','Shucked','Viscera','Shell','Rings',]
    ax1.set_xticklabels(labels,fontsize=6)
    ax1.set_yticklabels(labels,fontsize=6)
    # Add colorbar, make sure to specify tick locations to match desired ticklabels
    fig.colorbar(cax, ticks=[.75,.8,.85,.90,.95,1])
    plt.show()

correlation_matrix(df)